//  Date:         	06/02/2017
//  task:         	education analysis
//  project:      	World Development
//  user-written: 	sutex, sxpose, center 

capture log close
clear all
macro drop _all
set more off
set matsize 800 

//  #0:  setup
**************************
do make_index_gr.do

log using Prepare_school_QuasiControl,replace text

//  #1: load data
****************************************************
use ubridge_schools.dta

*define quasi control schools
drop if treat==0
replace treat=0 if cluster_id==.

rename q23 schooltype
bys schoolid: egen a = mean(schooltype)
replace schooltype = a
drop a
replace schooltype = 3 if schooltype==.

bys treat: tab schooltype
tabulate schooltype treat, chi2 exact
g cath = 0 
	replace cath = 1 if schooltype==2
g cou = 0
	replace cou = 1 if schooltype==3
g noaffil = 0
	replace noaffil = 1 if schooltype==1
ttest cath, by(treat)
ttest cou, by(treat)
ttest noaffil, by(treat)

//  #2: drop variables that have all missing values and clearly unnecessary 
****************************************************
findname, all(missing(@)) // findname is not installed in Stata by default
drop `r(varlist)'
drop treat_original NEAR_FID quasicontrol_school

foreach var in ORIG_FID NEAR_DIST NEAR_X NEAR_Y{
	move `var' V_deo_visit_rec
	}

// #3: rename variables too long to store in regression 
****************************************************
rename V_teacher_absent_avg Teacher_Absent	
ren BE_q9_q14_rec V_teachers_outside
replace V_teachers_outside=V_teachers_outside*-1
	rename V_students_per_supply1 students_supply1
	rename V_students_per_supply2 students_supply2
	rename V_students_per_supply3 students_supply3
	
	
// #4: select vars for analysis 
****************************************************
local monitoring V_deo_ever_call V_insp_calls V_deo_visit_rec V_isp_visit_rec  
local effort Teacher_Absent V_present_teach_e V_perc_alotwritten V_perc_Engaged V_school_staff_meet  
local input V_n_teachers V_teach_transf_to students_supply1 students_supply2 students_supply3 


// #5: keep only vars for analysis
****************************************************
keep `monitoring' `effort' `input' treat schoolid V_audit_period cluster_id district schooltype noaffil

*monitoring
	lab var V_deo_visit_rec "DEO visits"
	lab var V_deo_ever_call "DEO calls"
	lab var V_isp_visit_rec "Inspector visits"
	lab var V_insp_calls "Inspector calls"
*effort
	lab var Teacher_Absent "% Teachers present (records)"
	lab var V_present_teach_e "% Teachers present (observed)"
	lab var V_perc_alotwritten "Meaningful board"
	lab var V_perc_Engaged "Teacher engaged"
	lab var V_school_staff_meet "Staff meetings"
	
*inputs 
	lab var V_n_teachers "N. teachers employed"
	lab var V_teach_transf_to "Teachers transferred to school"
	lab var students_supply1 "Students per uniform"
	lab var students_supply2 "Students per book"
	lab var students_supply3 "Students per pencil"

foreach v in `monitoring' `effort' `input' {
        local l`v' : variable label `v'
            if `"`l`v''"' == "" {
            local l`v' "`v'"
        }
}

* label vars
lab var treat "Treatment"
lab define treat 0 "Quasi-Control" 1 "Treatment"
lab value treat treat

lab define period 1 "Base" 2 "mid" 3 "end", modify
lab value V_audit_period period

*********************************************************	
// #7: reshape data
*********************************************************

reshape wide `monitoring' `effort' `input' , i(schoolid) j(V_audit_period) 

foreach v in `monitoring' `effort' `input' {
        label var `v'1 "`l`v'' (baseline)" 
		label var `v'2 "`l`v'' (midline)"
		label var `v'3 "`l`v'' (endine)"
}

*********************************************************
// #8: table for balance of outcomes variable in baseline 
*********************************************************

local monitoring V_deo_ever_call V_insp_calls V_deo_visit_rec V_isp_visit_rec  
local effort Teacher_Absent V_present_teach_e V_perc_alotwritten V_perc_Engaged V_school_staff_meet  
local input V_n_teachers V_teach_transf_to students_supply1 students_supply2 students_supply3 

tempfile tmpfile

	foreach y in `monitoring' `effort' `input' { 
	reg `y'1 treat
	local lab : variable label `y'1
	regsave using  "`tmpfile'" /// //regsave is not installed in Stata by default
		, addlabel(Variable,"`lab'") pval table(`y', parentheses(stderr) format(%8.2fc)) `replace'
	local replace "append"
	}

preserve
use "`tmpfile'", clear
replace var = subinstr(var,"_coef","",1)
replace var = "" if strpos(var,"stderr")!=0

*transpose the data 
	sxpose, clear // sxpose is not installed in Stata by default

*dropping constant sd 
drop _var5 _var6 _var8 _var2
order _var9 _var4 _var1  _var3 _var7

unab vars : _var9 - _var7
foreach v in `vars' {
	replace `v' = substr(`v' , 2 , .) if regexm(`v' , "^_")
	local n = `v'[1]
	rename `v' `n'
	}
drop in 1

lab var cons "C mean"
lab var treat "T-C"
lab var treat_pval "P-val"
lab var N "N"

restore

*********************************************************
// #10: Construct education indices
*********************************************************

forvalues i=1(1)3{
	gl monitoring`i' V_deo_ever_call`i' V_insp_calls`i' V_deo_visit_rec`i' V_isp_visit_rec`i' 
}

foreach var in $monitoring1 $monitoring2 $monitoring3{
	quietly summarize `var' if treat==0
	local `var'_mean= r(mean)
	local `var'_sd= r(sd) 
	gen c_`var' = (`var'-``var'_mean')/``var'_sd'
	qui egen mean_std_`var'=mean(c_`var') if treat==1
	replace c_`var' = mean_std_`var' if treat==1 & c_`var'==. | `var'==.d
	replace c_`var' = 0 if treat==0 & c_`var'==. | `var'==.d
	qui egen mean_`var'1=mean(`var') if treat==1
	replace `var' = mean_`var'1 if treat==1 & `var'==. | `var'==.d
	qui egen mean_`var'0=mean(`var') if treat==0
	replace `var' = mean_`var'0 if treat==0 & `var'==. | `var'==.d
	}

forvalues i=1(1)3{	
	su  c_V_deo_ever_call`i' c_V_insp_calls`i' c_V_deo_visit_rec`i' c_V_isp_visit_rec`i' 
	su  V_deo_ever_call`i' V_insp_calls`i' V_deo_visit_rec`i' V_isp_visit_rec`i' 
}

gen wgt	=1
gen stdgroup=treat==0
	
forvalues i=1(1)3{	
	corr  c_V_deo_ever_call`i' c_V_insp_calls`i' c_V_deo_visit_rec`i' c_V_isp_visit_rec`i' 
	alpha c_V_deo_ever_call`i' c_V_insp_calls`i' c_V_deo_visit_rec`i' c_V_isp_visit_rec`i' , std item
	egen monitoring_index`i'=            rowmean(c_V_deo_ever_call`i' c_V_insp_calls`i' c_V_deo_visit_rec`i' c_V_isp_visit_rec`i')	
	make_index_gr edu_monitoring`i' wgt stdgroup V_deo_ever_call`i'     V_insp_calls`i'   V_deo_visit_rec`i' V_isp_visit_rec`i'
	corr monitoring_index`i' index_edu_monitoring`i' 
	su monitoring_index`i' index_edu_monitoring`i' if treat==0
	}

*****************
*education effort
*****************

forvalues i=1(1)3{
	gl effort`i' Teacher_Absent`i' V_present_teach_e`i' V_perc_alotwritten`i' V_perc_Engaged`i' V_school_staff_meet`i' 
}
    
foreach var in $effort1 $effort2 $effort3{
	quietly summarize `var' if treat==0
	local `var'_mean= r(mean)
	local `var'_sd= r(sd) 
	gen c_`var' = (`var'-``var'_mean')/``var'_sd'
	qui egen mean_std_`var'=mean(c_`var') if treat==1
	replace c_`var' = mean_std_`var' if treat==1 & c_`var'==. | `var'==.d
	replace c_`var' = 0 if treat==0 & c_`var'==. | `var'==.d
	qui egen mean_`var'1=mean(`var') if treat==1
	replace `var' = mean_`var'1 if treat==1 & `var'==. | `var'==.d
	qui egen mean_`var'0=mean(`var') if treat==0
	replace `var' = mean_`var'0 if treat==0 & `var'==. | `var'==.d
	*corr `var' c_`var'
	}

forvalues i=1(1)3{	
	su  c_Teacher_Absent`i' c_V_present_teach_e`i' c_V_perc_alotwritten`i' c_V_perc_Engaged`i' c_V_school_staff_meet`i' 
	su  Teacher_Absent`i' V_present_teach_e`i' V_perc_alotwritten`i' V_perc_Engaged`i' V_school_staff_meet`i' 
}

* index using Anderson (2008)	
	make_index_gr edu_effort1 wgt stdgroup  Teacher_Absent1 V_present_teach_e1 V_perc_alotwritten1 V_perc_Engaged1 V_school_staff_meet1
	make_index_gr edu_effort2 wgt stdgroup  Teacher_Absent2 V_present_teach_e2 V_perc_alotwritten2 V_perc_Engaged2
	make_index_gr edu_effort3 wgt stdgroup  Teacher_Absent3 V_present_teach_e3 V_perc_alotwritten3 V_perc_Engaged3 V_school_staff_meet3
	
* Index using Kling et al., (2007)
forvalues i=1(2)3{
	corr c_Teacher_Absent`i' c_V_present_teach_e`i' c_V_perc_alotwritten`i' c_V_perc_Engaged`i' c_V_school_staff_meet`i'
	alpha c_Teacher_Absent`i' c_V_present_teach_e`i' c_V_perc_alotwritten`i' c_V_perc_Engaged`i' c_V_school_staff_meet`i', std item	
	egen effort_index`i'=rowmean(c_Teacher_Absent`i' c_V_present_teach_e`i' c_V_perc_alotwritten`i' c_V_perc_Engaged`i' c_V_school_staff_meet`i')									
	corr effort_index`i' index_edu_effort`i'
	}

	replace c_V_school_staff_meet2=.
	egen effort_index2=rowmean(c_Teacher_Absent2 c_V_present_teach_e2 c_V_perc_alotwritten2 c_V_perc_Engaged2)									
	corr effort_index2 index_edu_effort2
	su effort_index2 index_edu_effort2 if treat==0
	
******************
* education inputs
******************

forvalues i=1(1)3{
	gl inputs`i' V_n_teachers`i' V_teach_transf_to`i' students_supply1`i' students_supply2`i' students_supply3`i' 
}

foreach var in $inputs1 $inputs2 $inputs3{
	quietly summarize `var' if treat==0
	local `var'_mean= r(mean)
	local `var'_sd= r(sd) 
	gen c_`var' = (`var'-``var'_mean')/``var'_sd'
	qui egen mean_std_`var'=mean(c_`var') if treat==1
	replace c_`var' = mean_std_`var' if treat==1 & c_`var'==.
	replace c_`var' = 0 if treat==0 & c_`var'==.
	qui egen mean_`var'1=mean(`var') if treat==1
	replace `var' = mean_`var'1 if treat==1 & `var'==.
	qui egen mean_`var'0=mean(`var') if treat==0
	replace `var' = mean_`var'0 if treat==0 & `var'==.
	}

forvalues i=1(1)3{	
	su  c_V_n_teachers`i' c_V_teach_transf_to`i' c_students_supply1`i' c_students_supply2`i' c_students_supply3`i' 
	su  V_n_teachers`i' V_teach_transf_to`i' students_supply1`i' students_supply2`i' students_supply3`i' 
}

* index using Anderson (2008)
	make_index_gr edu_input1 wgt stdgroup  V_n_teachers1 V_teach_transf_to1 students_supply11 students_supply21 students_supply31 
	make_index_gr edu_input2 wgt stdgroup  V_n_teachers2 V_teach_transf_to2 students_supply12 students_supply22 students_supply32 
	make_index_gr edu_input3 wgt stdgroup  V_n_teachers3 V_teach_transf_to3 students_supply13 students_supply22 students_supply33 
 
* Index using Kling et al., (2007)
 forvalues i=1(1)3{	
	alpha c_V_n_teachers`i' V_teach_transf_to`i' c_students_supply1`i' c_students_supply2`i' c_students_supply3`i', std item	
	egen input_index`i'=rowmean(c_V_n_teachers`i' c_V_teach_transf_to`i' c_students_supply1`i' c_students_supply2`i' c_students_supply3`i')									
	corr input_index`i' index_edu_input`i'
	}
 
	
*********************************************************
// #12: save 
*********************************************************
note: ubridge_schools_quasicontrol.dta 
label data "updated education dataset (quasi control): 05062018"
datasignature set , reset

saveold	ubridge_schools_quasicontrol.dta, replace

log close
clear
exit